package com.zyc.transfrom;



import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document.OutputSettings;
import org.w3c.dom.Document;  
  
/** 
 * Created by Carey on 15-2-2. 
 */  
public class Word2Html {  
	public static String  DOC = "source/doc"; 
	public static String  HTML = "source/html";
	public static String FILE_SEPARATOR = System.getProperty("file.separator");
    public static void main(String argv[]) {  
        try {  
        	String sourceDocFile /*= Word2Html.class.getClassLoader().getResource("11.docx").getPath()*/;
        	sourceDocFile = "D:\\11.doc";
        	String fileName = new File(sourceDocFile).getName();
        	System.out .println(fileName);
            String destHtmlFile = sourceDocFile.replace(DOC, HTML);
        	convert2Html(sourceDocFile,destHtmlFile);  
        	System.out .println(destHtmlFile);
        } catch (Exception e) {  
            e.printStackTrace();  
        }  
    }  
    public void process(String sSourceDocFile ,String destFolder) {
    	try {  
    		File sourceDocFile = new File(sSourceDocFile);
    		String fileName = sourceDocFile.getName();
    		String sDestFile = destFolder + fileName.replace(DOC, HTML);
            //String destHtmlFile = sSourceDocFile.replace(DOC, HTML);
        	convert2Html(sSourceDocFile,sDestFile);  
        } catch (Exception e) {  
            e.printStackTrace();  
        } 
    }
    //输出html文件   
    public static void writeFile(String content, String path) {  
        FileOutputStream fos = null;   
        BufferedWriter bw = null;  
        org.jsoup.nodes.Document doc=Jsoup.parse(content, "utf-8");
        org.jsoup.nodes.Document.OutputSettings setting=new OutputSettings();	
        setting.syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
        doc.outputSettings(setting);
        //System.out.println(doc.html().substring(0, 400));
        content=doc.html();  
        content = content.replace("&nbsp;", " ");
        try {  
            File file = new File(path);  
            fos = new FileOutputStream(file);  
            bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));  
            bw.write(content);  
        } catch (FileNotFoundException fnfe) {  
            fnfe.printStackTrace();  
        } catch (IOException ioe) {  
            ioe.printStackTrace();  
        } finally {  
            try {  
                if (bw != null)  
                    bw.close();   
                if (fos != null)  
                    fos.close();  
            } catch (IOException ie) {  
            }  
        }  
    }  
  
    //word 转 html   
    public static void convert2Html(String fileName, String outPutFile)  
            throws TransformerException, IOException,  
            ParserConfigurationException {  
  
        //HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));
    	final String sPicSavePath = outPutFile.replace(".html", "") + FILE_SEPARATOR ;
    	if(! new File(sPicSavePath).exists()) {
    		new File(sPicSavePath).mkdirs();
    	}
    	InputStream iStream = new FileInputStream(fileName);
        HWPFDocument wordDocument  = new HWPFDocument(iStream);
        //WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));  
         //兼容2007 以上版本  
//        XSSFWorkbook  xssfwork=new XSSFWorkbook(new FileInputStream(fileName));  
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(  
                DocumentBuilderFactory.newInstance().newDocumentBuilder()  
                        .newDocument());  
        wordToHtmlConverter.setPicturesManager( new PicturesManager()  
        {  
            public String savePicture( byte[] content,  
                                       PictureType pictureType, String suggestedName,  
                                       float widthInches, float heightInches )  
            {  
            	//String sPicSavePath = outPutFile.substring(0, outPutFile.lastIndexOf(Word2Html.FILE_SEPARATOR));
            	
                return  sPicSavePath + suggestedName;  
            	
            //	return "D:\\test\\"+suggestedName; 
            }  
        } );  
        wordToHtmlConverter.processDocument(wordDocument);  
        //save pictures  
        List pics=wordDocument.getPicturesTable().getAllPictures();
        if(pics!=null){  
            for(int i=0;i<pics.size();i++){  
                Picture pic = (Picture)pics.get(i);  
                try {  
                    pic.writeImageContent(new FileOutputStream(sPicSavePath  
                            + pic.suggestFullFileName()));  
                } catch (FileNotFoundException e) {  
                    e.printStackTrace();  
                }  
            }  
        }  
        Document htmlDocument = wordToHtmlConverter.getDocument();  
  
        ByteArrayOutputStream out = new ByteArrayOutputStream();  
        DOMSource domSource = new DOMSource(htmlDocument);  
        StreamResult streamResult = new StreamResult(out);  
  
  
        TransformerFactory tf = TransformerFactory.newInstance();  
        Transformer serializer = tf.newTransformer();  
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");  
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");  
        serializer.setOutputProperty(OutputKeys.METHOD, "HTML");  
        serializer.transform(domSource, streamResult);  
        out.close();  
        writeFile(new String(out.toByteArray()), outPutFile);  
    }  
}  
